"""
Build .docx from paper.md + output tables.
Commodity Demographics Paper.
"""

import re
import os
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import pandas as pd

PAPER_DIR = '/mnt/c/demographics_capital_flows/commodity_demographics/paper'
TABLE_DIR = '/mnt/c/demographics_capital_flows/commodity_demographics/output/tables'

def build():
    doc = Document()

    # Styles
    style = doc.styles['Normal']
    font = style.font
    font.name = 'Times New Roman'
    font.size = Pt(12)

    style_h1 = doc.styles['Heading 1']
    style_h1.font.name = 'Times New Roman'
    style_h1.font.size = Pt(14)

    style_h2 = doc.styles['Heading 2']
    style_h2.font.name = 'Times New Roman'
    style_h2.font.size = Pt(13)

    with open(os.path.join(PAPER_DIR, 'paper.md'), 'r', encoding='utf-8') as f:
        lines = f.readlines()

    i = 0
    while i < len(lines):
        line = lines[i].rstrip()

        # Skip empty lines
        if not line:
            i += 1
            continue

        # Title (# heading)
        if line.startswith('# ') and not line.startswith('## '):
            p = doc.add_heading(line[2:], level=0)
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            i += 1
            continue

        # Section headings
        if line.startswith('## '):
            doc.add_heading(line[3:], level=1)
            i += 1
            continue

        if line.startswith('### '):
            doc.add_heading(line[4:], level=2)
            i += 1
            continue

        # Table reference
        table_match = re.match(r'\{table:(.+?)\}', line)
        if table_match:
            table_file = table_match.group(1)
            table_path = os.path.join(TABLE_DIR, table_file)
            if os.path.exists(table_path):
                try:
                    df = pd.read_csv(table_path)
                    if len(df.columns) > 10:
                        df = df.iloc[:, :10]
                    if len(df) > 40:
                        df = df.head(40)

                    table = doc.add_table(rows=1 + len(df), cols=len(df.columns))
                    table.style = 'Light Grid Accent 1'

                    for j, col in enumerate(df.columns):
                        cell = table.rows[0].cells[j]
                        cell.text = str(col)
                        for paragraph in cell.paragraphs:
                            for run in paragraph.runs:
                                run.font.size = Pt(9)
                                run.font.name = 'Times New Roman'
                                run.bold = True

                    for row_idx, (_, row) in enumerate(df.iterrows()):
                        for j, val in enumerate(row):
                            cell = table.rows[row_idx + 1].cells[j]
                            if isinstance(val, float):
                                if abs(val) < 0.01 and val != 0:
                                    cell.text = f"{val:.4f}"
                                elif abs(val) > 1000:
                                    cell.text = f"{val:,.0f}"
                                else:
                                    cell.text = f"{val:.3f}"
                            else:
                                cell.text = str(val)
                            for paragraph in cell.paragraphs:
                                for run in paragraph.runs:
                                    run.font.size = Pt(9)
                                    run.font.name = 'Times New Roman'

                    doc.add_paragraph('')
                except Exception as e:
                    doc.add_paragraph(f"[Table {table_file}: error loading — {e}]")
            else:
                doc.add_paragraph(f"[Table {table_file}: file not found]")
            i += 1
            continue

        # Markdown table
        if line.startswith('|'):
            table_lines = []
            while i < len(lines) and lines[i].strip().startswith('|'):
                table_lines.append(lines[i].strip())
                i += 1

            rows = []
            for tl in table_lines:
                if re.match(r'\|[\s\-:]+\|', tl):
                    continue
                cells = [c.strip() for c in tl.split('|')[1:-1]]
                rows.append(cells)

            if len(rows) > 1:
                ncols = len(rows[0])
                table = doc.add_table(rows=len(rows), cols=ncols)
                table.style = 'Light Grid Accent 1'
                for ri, row in enumerate(rows):
                    for ci, val in enumerate(row):
                        if ci < ncols:
                            cell = table.rows[ri].cells[ci]
                            cell.text = val
                            for paragraph in cell.paragraphs:
                                for run in paragraph.runs:
                                    run.font.size = Pt(10)
                                    run.font.name = 'Times New Roman'
                                    if ri == 0:
                                        run.bold = True
                doc.add_paragraph('')
            continue

        # Bulleted list
        if line.startswith('- '):
            p = doc.add_paragraph(style='List Bullet')
            parts = re.split(r'(\*\*.*?\*\*|\*.*?\*)', line[2:])
            for part in parts:
                if part.startswith('**') and part.endswith('**'):
                    run = p.add_run(part[2:-2])
                    run.bold = True
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
                elif part.startswith('*') and part.endswith('*'):
                    run = p.add_run(part[1:-1])
                    run.italic = True
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
                else:
                    run = p.add_run(part)
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
            i += 1
            continue

        # Bold/italic processing for regular paragraphs
        p = doc.add_paragraph()
        parts = re.split(r'(\*\*.*?\*\*|\*.*?\*)', line)
        for part in parts:
            if part.startswith('**') and part.endswith('**'):
                run = p.add_run(part[2:-2])
                run.bold = True
                run.font.name = 'Times New Roman'
                run.font.size = Pt(12)
            elif part.startswith('*') and part.endswith('*'):
                run = p.add_run(part[1:-1])
                run.italic = True
                run.font.name = 'Times New Roman'
                run.font.size = Pt(12)
            else:
                run = p.add_run(part)
                run.font.name = 'Times New Roman'
                run.font.size = Pt(12)

        i += 1

    output_path = os.path.join(PAPER_DIR, 'commodity_demographics.docx')
    doc.save(output_path)
    print(f"Saved: {output_path}")
    print(f"Size: {os.path.getsize(output_path) / 1024:.0f} KB")

if __name__ == '__main__':
    build()
